{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "OpenSpielTutorial.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "toc_visible": true
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "metadata": {
        "id": "odj1Coq5H080"
      },
      "source": [
        "#@title ##### License { display-mode: \"form\" }\n",
        "# Copyright 2019 DeepMind Technologies Ltd. All rights reserved.\n",
        "#\n",
        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
        "# you may not use this file except in compliance with the License.\n",
        "# You may obtain a copy of the License at\n",
        "#\n",
        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
        "#\n",
        "# Unless required by applicable law or agreed to in writing, software\n",
        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
        "# See the License for the specific language governing permissions and\n",
        "# limitations under the License."
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "dOOzDGYAZcW3"
      },
      "source": [
        "# OpenSpiel\n",
        "\n",
        "* This Colab gets you started the basics of OpenSpiel.\n",
        "* OpenSpiel is a framework for reinforcement learning in games. The code is hosted [on github](https://github.com/deepmind/open_spiel/).\n",
        "* There is an accompanying video tutorial that works through this colab. It will be linked here once it is live.\n",
        "* There is also an [OpenSpiel paper](https://arxiv.org/abs/1908.09453) with more detail."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "XC6kQBzWahEF"
      },
      "source": [
        "## Install"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "-2_Vbijh4FlZ"
      },
      "source": [
        "The following command will install OpenSpiel via pip.\n",
        "\n",
        "Only the required dependencies are installed. You may need other dependencies if you use some of the algorithms. There is a [the complete list of packages and versions](https://github.com/deepmind/open_spiel/blob/master/open_spiel/scripts/python_extra_deps.sh) we install for the CI tests, which can be installed as necessary.\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "lQc12Xrn4CXU"
      },
      "source": [
        "!pip install --upgrade open_spiel"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "jUtlXZ8FBnAL"
      },
      "source": [
        "# Part 1. OpenSpiel API Basics."
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Importing pyspiel and showing the list of supported games.\n",
        "import pyspiel\n",
        "print(pyspiel.registered_names())"
      ],
      "metadata": {
        "id": "bDXdNLJbsZaD"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Loading a game (with no/default parameters).\n",
        "game = pyspiel.load_game(\"tic_tac_toe\")\n",
        "print(game)"
      ],
      "metadata": {
        "id": "74glfO8dsmPn"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Some properties of the games.\n",
        "print(game.num_players())\n",
        "print(game.max_utility())\n",
        "print(game.min_utility())\n",
        "print(game.num_distinct_actions())"
      ],
      "metadata": {
        "id": "tthnjDQxuuW1"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Creating initial states.\n",
        "state = game.new_initial_state()\n",
        "print(state)"
      ],
      "metadata": {
        "id": "po2CYySVu-rC"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Basic information about states.\n",
        "print(state.current_player())\n",
        "print(state.is_terminal())\n",
        "print(state.returns())\n",
        "print(state.legal_actions())"
      ],
      "metadata": {
        "id": "ZxXCiDjXvNMQ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Playing the game: applying actions.\n",
        "state = game.new_initial_state()\n",
        "state.apply_action(1)\n",
        "print(state)\n",
        "print(state.current_player())\n",
        "state.apply_action(2)\n",
        "state.apply_action(4)\n",
        "state.apply_action(0)\n",
        "state.apply_action(7)\n",
        "print(state)\n",
        "print(state.is_terminal())\n",
        "print(state.player_return(0))   # win for x (player 0)\n",
        "print(state.current_player())"
      ],
      "metadata": {
        "id": "GQypywhgvh6t"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Different game: Breakthrough with default parameters (number of rows and columns are both 8)\n",
        "game = pyspiel.load_game(\"breakthrough\")\n",
        "state = game.new_initial_state()\n",
        "print(state)"
      ],
      "metadata": {
        "id": "fxu3ZTxxvmrW"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Parameterized games: loading a 6x6 Breakthrough.\n",
        "game = pyspiel.load_game(\"breakthrough(rows=6,columns=6)\")\n",
        "state = game.new_initial_state()\n",
        "print(state)\n",
        "print(state.legal_actions())\n",
        "print(game.num_distinct_actions())\n",
        "for action in state.legal_actions():\n",
        "  print(f\"{action} {state.action_to_string(action)}\")"
      ],
      "metadata": {
        "id": "rQV0169-wuLI"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Part 2. Normal-form Games and Evolutionary Dynamics in OpenSpiel."
      ],
      "metadata": {
        "id": "PeB3zc8AzDlZ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import pyspiel\n",
        "game = pyspiel.create_matrix_game([[1, -1], [-1, 1]], [[-1, 1], [1, -1]])\n",
        "print(game)   # name not provided: uses a default\n",
        "state = game.new_initial_state()\n",
        "print(state)  # action names also not provided; defaults used"
      ],
      "metadata": {
        "id": "u2eRTZr4zm_G"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Normal-form games are 1-step simultaneous-move games.\n",
        "print(state.current_player())    # special player id \n",
        "print(state.legal_actions(0))    # query legal actions for each player\n",
        "print(state.legal_actions(1))\n",
        "print(state.is_terminal())\n"
      ],
      "metadata": {
        "id": "N6E0hG4J0TaI"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Applying a joint action (one action per player)\n",
        "state.apply_actions([0, 0])\n",
        "print(state.is_terminal())\n",
        "print(state.returns())"
      ],
      "metadata": {
        "id": "RPfvosEU0pt9"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Evolutionary dynamics in Rock, Paper, Scissors\n",
        "from open_spiel.python.egt import dynamics\n",
        "from open_spiel.python.egt.utils import game_payoffs_array\n",
        "import numpy as np\n",
        "\n",
        "game = pyspiel.load_matrix_game(\"matrix_rps\")   # load the Rock, Paper, Scissors matrix game\n",
        "payoff_matrix = game_payoffs_array(game)        # convert any normal-form game to a numpy payoff matrix\n",
        "\n",
        "dyn = dynamics.SinglePopulationDynamics(payoff_matrix, dynamics.replicator)\n",
        "x = np.array([0.2, 0.2, 0.6])                   # population heavily-weighted toward scissors\n",
        "dyn(x)"
      ],
      "metadata": {
        "id": "fq4NRSrz04xe"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Choose a step size and apply the dynamic\n",
        "alpha = 0.01\n",
        "x += alpha * dyn(x)\n",
        "print(x)\n",
        "x += alpha * dyn(x)\n",
        "print(x)\n",
        "x += alpha * dyn(x)\n",
        "x += alpha * dyn(x)\n",
        "x += alpha * dyn(x)\n",
        "x += alpha * dyn(x)\n",
        "print(x)"
      ],
      "metadata": {
        "id": "jPzX2HWK1VvJ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Part 3. Chance Nodes and Partially-Observable Games."
      ],
      "metadata": {
        "id": "p-i_tT8HzLU1"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Kuhn poker: simplified poker with a 3-card deck (https://en.wikipedia.org/wiki/Kuhn_poker)\n",
        "import pyspiel\n",
        "game = pyspiel.load_game(\"kuhn_poker\")\n",
        "print(game.num_distinct_actions())    # bet and fold\n"
      ],
      "metadata": {
        "id": "bA6hgOQW2iUz"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Chance nodes.\n",
        "state = game.new_initial_state()\n",
        "print(state.current_player())     # special chance player id\n",
        "print(state.is_chance_node())\n",
        "print(state.chance_outcomes())    # distibution over outcomes as a list of (outcome, probability) pairs"
      ],
      "metadata": {
        "id": "RxVzdLjU2zWM"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Applying chance node outcomes: same function as applying actions.\n",
        "state.apply_action(0)              # let's choose the first card (jack)\n",
        "print(state.is_chance_node())      # still at a chance node (player 2's card).\n",
        "print(state.chance_outcomes())     # jack no longer a possible outcome\n",
        "state.apply_action(1)              # second player gets the queen\n",
        "print(state.current_player())      # no longer chance node, time to play!"
      ],
      "metadata": {
        "id": "avTQrpRA3OOQ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# States vs. information states\n",
        "print(state)                              # ground/world state (all information open)\n",
        "print(state.legal_actions())\n",
        "for action in state.legal_actions():\n",
        "  print(state.action_to_string(action))\n",
        "print(state.information_state_string())   # only current player's information!"
      ],
      "metadata": {
        "id": "UHZ7vU_V4SZm"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Take an action (pass / check), second player's turn.\n",
        "# Information state tensor is vector of floats (often bits) representing the information state.\n",
        "state.apply_action(0)\n",
        "print(state.current_player())\n",
        "print(state.information_state_string())   # now contains second player's card and the public action sequence\n",
        "print(state.information_state_tensor())"
      ],
      "metadata": {
        "id": "RuzH-yOK4xmg"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Leduc poker is a larger game (6 cards, two suits), 3 actions: fold, check/call, raise.\n",
        "game = pyspiel.load_game(\"leduc_poker\")\n",
        "print(game.num_distinct_actions())\n",
        "state = game.new_initial_state()\n",
        "print(state)\n",
        "state.apply_action(0)     # first player gets first jack \n",
        "state.apply_action(1)     # second player gets second jack\n",
        "print(state.current_player())\n",
        "print(state.information_state_string())\n",
        "print(state.information_state_tensor())\n"
      ],
      "metadata": {
        "id": "tmJbLdme5P8a"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Let's check until the second round.\n",
        "print(state.legal_actions_mask())    # Helper function for neural networks.\n",
        "state.apply_action(1)     # check\n",
        "state.apply_action(1)     # check\n",
        "print(state)\n",
        "print(state.chance_outcomes())   # public card (4 left in the deck)\n",
        "state.apply_action(2)\n",
        "print(state.information_state_string())   # player 0's turn again."
      ],
      "metadata": {
        "id": "4MwssaTo58yO"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Part 4. Basic RL: Self-play Q-Learning in Tic-Tac-Toe."
      ],
      "metadata": {
        "id": "3PGnADszzbNP"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Let's do independent Q-learning in Tic-Tac-Toe, and play it against random.\n",
        "# RL is based on python/examples/independent_tabular_qlearning.py\n",
        "from open_spiel.python import rl_environment\n",
        "from open_spiel.python import rl_tools\n",
        "from open_spiel.python.algorithms import tabular_qlearner\n",
        "\n",
        "# Create the environment\n",
        "env = rl_environment.Environment(\"tic_tac_toe\")\n",
        "num_players = env.num_players\n",
        "num_actions = env.action_spec()[\"num_actions\"]\n",
        "\n",
        "# Create the agents\n",
        "agents = [\n",
        "    tabular_qlearner.QLearner(player_id=idx, num_actions=num_actions)\n",
        "    for idx in range(num_players)\n",
        "]"
      ],
      "metadata": {
        "id": "EnfdHFr7621m"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Train the Q-learning agents in self-play.\n",
        "for cur_episode in range(25000):\n",
        "  if cur_episode % 1000 == 0:\n",
        "    print(f\"Episodes: {cur_episode}\")\n",
        "  time_step = env.reset()\n",
        "  while not time_step.last():\n",
        "    player_id = time_step.observations[\"current_player\"]\n",
        "    agent_output = agents[player_id].step(time_step)\n",
        "    time_step = env.step([agent_output.action])\n",
        "  # Episode is over, step all agents with final info state.\n",
        "  for agent in agents:\n",
        "    agent.step(time_step)\n",
        "print(\"Done!\")"
      ],
      "metadata": {
        "id": "mDgnvsjZ7vZI"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Evaluate the Q-learning agent against a random agent.\n",
        "from open_spiel.python.algorithms import random_agent\n",
        "eval_agents = [agents[0], random_agent.RandomAgent(1, num_actions, \"Entropy Master 2000\") ]\n",
        "\n",
        "time_step = env.reset()\n",
        "while not time_step.last():\n",
        "  print(\"\")\n",
        "  print(env.get_state)\n",
        "  player_id = time_step.observations[\"current_player\"]\n",
        "  # Note the evaluation flag. A Q-learner will set epsilon=0 here.\n",
        "  agent_output = eval_agents[player_id].step(time_step, is_evaluation=True)\n",
        "  print(f\"Agent {player_id} chooses {env.get_state.action_to_string(agent_output.action)}\")\n",
        "  time_step = env.step([agent_output.action])\n",
        "\n",
        "print(\"\")\n",
        "print(env.get_state)\n",
        "print(time_step.rewards)\n"
      ],
      "metadata": {
        "id": "3GPNio828vyg"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}
